version 17.0
clear all
cd "MYPATH\derived\build_model_sample"
adopath + ../../ado/
cap log close

log using "build_v2.log", replace

preliminaries
foreach PATH in RESULTS TEMP {
	cap mkdir "${`PATH'}\derived"
	cap mkdir "${`PATH'}\derived\build_model_sample"
    if "`PATH'" != "TEMP" cap mkdir "${`PATH'}\derived\build_model_sample\figures"
}

graph set window fontface default
graph set ps fontface default
graph set window fontfacemono "Consolas"
graph set ps fontfacemono "Consolas"


program main

	*********
	*build dataset 
	*********
	clean_mfr 
	
	combine_mfr_analysis
	prev_birth_issues
	merge_miscarriages
	
	grab_civil 
	merge_lisa
	
	merge_kids
	merge_income_tiles
	
	tab year if main_flag == 1
	keep if year > 2010 | main_flag == 1
	save $TEMP\derived\build_model_sample\mfr_analysis_plus_issues_mis_LISA_kids.dta, replace

	*********
	*clean up intermediate datasets 
	*********
	rm $TEMP\derived\build_model_sample\miscarriages.dta
	rm $TEMP\derived\build_model_sample\civil.dta
	rm $TEMP\derived\build_model_sample\allsampleyears.dta
	rm $TEMP\derived\build_model_sample\temp_2019.dta 
	
	cap log close
	*clear all 
end


program clean_mfr
	use lopnr childid bfoddat birth_flag nonsingleton num_birth_records bpsmdat bpuldat bdiag* ///
	  grdbs grvfv grdfv using "MYPATH\MYPATH.dta", clear
	qui {
		di "clean bpuldat/bpsmdat/grdfv so that data can be appended"
		foreach var in bpul bpsm {
			tostring `var'dat, replace
			gen `var'dat_year = substr(`var'dat, 1, 4)
			gen `var'dat_month = substr(`var'dat, 5, 2) if length(`var'dat) > 4
			gen `var'dat_day = substr(`var'dat, 7, 2) if length(`var'dat) >5
			destring `var'dat_year `var'dat_month `var'dat_day `var'dat, replace
			replace `var'dat = mdy(`var'dat_month, `var'dat_day, `var'dat_year)
			format `var'dat %td
		}
		di "Extract birth date"
		gen bfoddat_month = mod(bfoddat, 100)
		gen bfoddat_yr = floor(bfoddat/100)
		
		di "Calculate birth date = expected due date - 280 + gest age at birth"
		destring grdfv, replace
		gen gest_age_birth = grdbs
		replace gest_age_birth = grvfv*7 + grdfv if mi(grdbs)
		gen birth_date = bpuldat - 280 + gest_age_birth
		gen birth_date_flag = 1 if !mi(birth_date)
		replace birth_date = bpsmdat - 280 + gest_age_birth if mi(birth_date)
		replace birth_date_flag = 2 if !mi(birth_date) & mi(birth_date_flag)
		replace birth_date = mdy(bfoddat_month, 15, bfoddat_yr) if mi(birth_date)
		replace birth_date_flag = 3 if mi(birth_date_flag)
		format birth_date %td
		label define bdflag 1 "bpuldat - 280 + gest_age_birth" 2 "bpsmdat - 280 + gest_age_birth" ///
		  3 "15th of bfoddat"
		label val birth_date_flag bdflag
		drop gest_age_birth
		
		gen impute_birth_month_discrep = 99
		gen b_m = month(birth_date)
		replace impute_birth_month_discrep = 0 if birth_date_flag == 1 | birth_date_flag == 2
		replace impute_birth_month_discrep = 1 if b_m != bfoddat_month & birth_date_flag == 1
		replace impute_birth_month_discrep = 1 if b_m != bfoddat_month & birth_date_flag == 2
		label define bmdisc 0 "No discrepancy" 1 "Discrepancy" 99 "Birth date from bfoddat"
		label val impute_birth_month_discrep bmdisc
		label variable impute_birth_month_discrep "flag whether imputed birth date month == bfoddat month"
		
		di "date of birth"
		la var bfoddat "date of birth"
		gen dob = ym(bfoddat_yr, bfoddat_month)
			format dob %tm
			la var dob "date of birth (year, month)"
	}
	tab impute_birth_month_discrep, mi
	di "count number of fetuses that made it past 21 weeks in pregnancy"
	by lopnr dob, sort: gen fetus_count = _n 
		la var fetus_count "arbitrary rank of fetus within pregnancy"
	egen num_fetuses = max(fetus_count), by(lopnr dob)
		la var num_fetuses "number of fetuses that made it past 21 weeks in pregnancy"
	
	di "merge other vars from LISA/Bakgrund to the mfr sample"
	rename bfoddat_yr year 
	merge m:1 lopnr year using "$MYPATH\MYPATH.dta", ///
	  assert(1 2 3) keep(1 3) keepusing(lan educ) nogen 
	 * education
	replace educ = educ + 1 if !mi(educ) // code as 1, 2, 3 instead of 0, 1, 2
	label def lab_educ 1 "No college" 2 "Some college" 3 "Full college"
	label val educ lab_educ
	label variable educ "education year of due date"
	
	* merge previous year education
	rename year due_dt_year
	gen year = due_dt_year - 1
	rename educ due_dt_year_educ
	merge m:1 lopnr year using "$MYPATH\MYPATH.dta", ///
		assert(1 2 3) keep(1 3) keepusing(educ) nogen
	replace educ = educ + 1 if !mi(educ) // code as 1, 2, 3 instead of 0, 1, 2
	label val educ lab_educ
	label variable educ "education year before due date"
	drop year
	rename due_dt_year year

	merge m:1 lopnr using "MYPATH\MYPATH.dta", ///
	  assert(1 2 3) keep(1 3) keepusing(fodelseland_fodgreg4 fodelsear fodelsemanad) nogen  
	* foreign born
	rename fodelseland_fodgreg4 mother_birth_place
	gen mom_foreign = 1 * (mother_birth_place != "Sverige" | mi(mother_birth_place))
	replace mom_foreign = mom_foreign + 1 
	label def lab_for 1 "Swedish" 2 "Foreign"
	label val mom_foreign lab_for
	*age 
	destring fodelsemanad fodelsear, replace
	gen temp_mother_birth = mdy(fodelsemanad, 15, fodelsear)
	format temp_mother_birth %td
	personage temp_mother_birth birth_date, gen(age)
	drop fodelsemanad fodelsear
	rename year bfoddat_yr
	
	* fill in other key variables for mfr sample
	gen date_preg = bpuldat
	replace date_preg = birth_date if mi(date_preg)
	gen year = year(date_preg)
	gen month = month(date_preg)
	gen m_year = ym(year, month)
	assert !mi(date_preg) & !mi(m_year)
	
	* merge waves into mfr sample
	merge m:1 lan using $DATA\waves.dta, assert(1 2 3) keep(1 3) nogen
	gen week10_date = date_preg - 210
	gen wave = 1 if week10_date < dofm(wave2_intro)
	replace wave = 2 if week10_date >= dofm(wave2_intro)
	replace wave = 3 if week10_date >= dofm(wave3_intro)
	
	* KUB coverage type 
	gen kub_type = 0 if wave == 1 
	replace kub_type = 1 if inlist(lan, 3, 4, 9, 12, 14, 20, 21, 22, 23, 24) & wave > 1 & !mi(wave)
	replace kub_type = 2 if inlist(lan, 1, 5, 6, 7, 8, 13, 17, 18, 19) & wave > 1 & !mi(wave)
	* Some Counties modified KUB Offers
	replace kub_type = 1 if lan == 19 & week10_date >= dofm(ym(2018, 1))
	replace kub_type = 2 if lan == 3 & week10_date >= dofm(ym(2016, 1))
	replace kub_type = 2 if lan == 20 & week10_date >= dofm(ym(2017, 11))
	replace kub_type = 2 if lan == 22 & week10_date >= dofm(ym(2015, 1))
	replace kub_type = 2 if lan == 23 & week10_date >= dofm(ym(2016, 4))
	replace kub_type = 1 if lan == 17 & week10_date >= dofm(ym(2014,6)) & week10_date <= dofm(ym(2014, 9))
	replace kub_type = 1 if lan == 17 & week10_date >= dofm(ym(2015,4)) & week10_date <= dofm(ym(2015, 9))
	
	replace kub_type = 3 if lan == 10 & wave == 2
	label define kub_t 1 "Age 35 KUB threshold" 2 "Universal KUB coverage" 3 "Age 38 KUB threshold"
	label values kub_type kub_t
	
	di "clean issues"
	* Preterm birth
	gen preterm_live = 1 * (grdbs <= 259 & (birth_flag == 1 | birth_flag == 4))
	gen vpreterm_live = 1 * (grdbs <= 224 & (birth_flag == 1 | birth_flag == 4))
	tab preterm_live 
	tab vpreterm_live
		
	* Baby's diagnoses
	gen baby_diagnosis_birth = ""
	forval i = 1/12 {
		replace baby_diagnosis_birth = baby_diagnosis_birth + "" + bdiag`i' if !mi(bdiag`i')
	}
	drop bdiag*
	qui gen chrom_ab = 1 * (strpos(baby_diagnosis_birth, "Q9") != 0)
	perinatal_issues
	gen stillbirth = 1 * (birth_flag == 2)
	gen death_28 = 1 * (birth_flag == 3)
	
	di "save mfr sample"
	save $TEMP\derived\build_model_sample\mfr_for_merge.dta, replace
end


program combine_mfr_analysis
	di "append main analysis sample (which only contains singletons) to mfr sample"
	use $DATA\analysis_sample.dta, clear 
	di "note: for dob, take bfoddat if possible (to be consistent), otherwise use date_preg"
	gen dob = ym(floor(bfoddat/100), mod(bfoddat, 100))
		replace dob = ym(year(date_preg), month(date_preg)) if dob==. 
		format dob %tm
		la var dob "date of birth (year, month): actual, then predicted if actual is missing"
	gen death_28 = 1 * (birth_flag == 3)
	gen main_flag = 1
	append using $TEMP\derived\build_model_sample\mfr_for_merge.dta
	rm $TEMP\derived\build_model_sample\mfr_for_merge.dta
	
	di "drop duplicates of analysis observations that are in the mfr"
		duplicates tag lopnr dob, gen(ddup)
		tab ddup 
		drop if ddup>0 & pregnancy==.
		drop ddup num_fetuses fetus_count 
end

program prev_birth_issues	
	di "by lopnr-dob, calculate number of various past birth issues"
	sort lopnr dob 
	by lopnr: gen preg_num = _n
	by lopnr: egen tot_pregs = max(preg_num)
	sum preg_num 
	local max_pregs_mom = r(max)
	
	foreach var in stillbirth death_28 preterm_live vpreterm_live chrom_ab congen_deform pnatal_issue {
		local label : variable label `var' 
		qui gen prev_`var' = 0
		if strpos("`var'", "pnatal_issue") != 0 label variable prev_`var' `"Prev `label'"'
		forval i = 1/`max_pregs_mom' { 
			local one_less = `i' - 1
			if `i' != 1 {
				forval k = 1/`one_less'{
					qui replace prev_`var' = 1 if preg_num == `i' & `var'[_n - `k'] == 1
				}
			}
		}
	}
	foreach num of numlist 1/12 {
		local var "pnatal_issue`num'"
		local label : variable label `var' 
		qui gen prev_`var' = 0
		if strpos("`var'", "pnatal_issue") != 0 label variable prev_`var' `"Prev `label'"'
		forval i = 1/`max_pregs_mom' { 
			local one_less = `i' - 1
			if `i' != 1 {
				forval k = 1/`one_less'{
					qui replace prev_`var' = 1 if preg_num == `i' & `var'[_n - `k'] == 1
				}
			}
		}
	}
	
	* Create new preg id that includes those not in analysis sample
	assert main_flag == 1 if !mi(pregnancy)
	gen pregid_full = pregnancy
	qui sum pregid_full, d 
	local max_preg = r(max)
	egen preg_temp = group(lopnr dob) if mi(pregnancy)
	replace pregid_full = `max_preg' + preg_temp if mi(pregnancy)
	drop preg_temp
	assert !mi(pregid_full)
	isid pregid_full

	save $TEMP\derived\build_model_sample\mfr_analysis_plus_issues.dta, replace 
end 

program perinatal_issues
	qui gen pnatal_issue = 1 * (strpos(baby_diagnosis_birth, "P") != 0)
	label var pnatal_issue "Any P ICD code"
	qui gen pnatal_issue1 = 0 
	label var pnatal_issue1 "P00-P04"
	foreach i in "0" "1" "2" "3" "4" {
		replace pnatal_issue1 = 1 if (strpos(baby_diagnosis_birth, "P0`i'") != 0)
	}
	qui gen pnatal_issue2 = 0
	label var pnatal_issue2 "P05-P08"
	foreach i in "5" "6" "7" "8" {
		replace pnatal_issue2 = 1 if (strpos(baby_diagnosis_birth, "P0`i'") != 0)
	}
	qui gen pnatal_issue3 = 0
	label var pnatal_issue3 "P09"
	replace pnatal_issue3 = 1 if (strpos(baby_diagnosis_birth, "P09") != 0)
	qui gen pnatal_issue4 = 0 
	label var pnatal_issue4 "P10-P15"
	foreach i in "0" "1" "2" "3" "4" "5" {
		replace pnatal_issue4 = 1 if (strpos(baby_diagnosis_birth, "P1`i'") != 0)
	}
	qui gen pnatal_issue5 = 0 

	label var pnatal_issue5 "P19-P29"
	foreach i in "0" "1" "2" "3" "4" "5" "6" "7" "8" "9" {
		if "`i'" == "0" replace pnatal_issue5 = 1 if (strpos(baby_diagnosis_birth, "P19") != 0)
		replace pnatal_issue5 = 1 if (strpos(baby_diagnosis_birth, "P2`i'") != 0)
	}
	qui gen pnatal_issue6 = 0
	label var pnatal_issue6 "P35-P39"
	foreach i in "5" "6" "7" "8" "9" {
		replace pnatal_issue6 = 1 if (strpos(baby_diagnosis_birth, "P3`i'") != 0)
	}
	qui gen pnatal_issue7 = 0 
	label var pnatal_issue7 "P50-P61"
	foreach i in "0" "1" "2" "3" "4" "5" "6" "7" "8" "9" {
		if "`i'" == "0" replace pnatal_issue7 = 1 if (strpos(baby_diagnosis_birth, "P60") != 0)
		if "`i'" == "1" replace pnatal_issue7 = 1 if (strpos(baby_diagnosis_birth, "P61") != 0)
		replace pnatal_issue7 = 1 if (strpos(baby_diagnosis_birth, "P5`i'") != 0)
	}
	qui gen pnatal_issue8 = 0 
	label var pnatal_issue8 "P70-P74"
	foreach i in "0" "1" "2" "3" "4" {
		replace pnatal_issue8 = 1 if (strpos(baby_diagnosis_birth, "P7`i'") != 0)
	}
	qui gen pnatal_issue9 = 0 
	label var pnatal_issue9 "P76-P78"
	foreach i in "6" "7" "8" {
		replace pnatal_issue9 = 1 if (strpos(baby_diagnosis_birth, "P7`i'") != 0)
	}
	qui gen pnatal_issue10 = 0 
	label var pnatal_issue10 "P80-P83"
	foreach i in "0" "1" "2" "3" {
		replace pnatal_issue10 = 1 if (strpos(baby_diagnosis_birth, "P8`i'") != 0)
	}
	
	qui gen pnatal_issue11 = 0 
	label var pnatal_issue11 "P84"
	foreach i in "4" {
		replace pnatal_issue11 = 1 if (strpos(baby_diagnosis_birth, "P8`i'") != 0)
	}
	qui gen pnatal_issue12 = 0 
	label var pnatal_issue12 "P90-P96"
	foreach i in "0" "1" "2" "3" "4" "5" "6" {
		replace pnatal_issue12 = 1 if (strpos(baby_diagnosis_birth, "P9`i'") != 0)
	}
		
	qui gen congen_deform = 0 // non chrom_ab deformation
	foreach i in "0" "1" "2" "3" "4" "5" "6" "7" "8" {
		replace congen_deform = 1 if (strpos(baby_diagnosis_birth, "Q`i'") != 0)
	}

end


program merge_miscarriages 
	di "grab miscarriages from outpatient and inpatient registers"
	forvalues i = 1/2 {
		
		if `i'==1 {
			di "open raw outpatient register"
			use "MYPATH\MYPATH.dta", clear
			drop indatum 
			cap drop pvard
			append using "MYPATH\MYPATH.dta", force
			gen source = "outpatient"
		}
		else if `i'==2 {
			di "open raw inpatient register"
			use "MYPATH\MYPATH.dta", clear
			drop indatum utdatuma utdatum op mvo sjukhus lt_klin sjukhusnamn 
			cap drop pvard
			append using "MYPATH\MYPATH.dta", force
			gen source = "inpatient"
		}
		di "keep miscarriage records only"
		gen diagnos_temp = ""
		forval k = 1/30 {
			replace diagnos_temp = diagnos_temp + "" + dia`k' if !mi(dia`k') 
		}	
		replace diagnos = diagnos_temp if mi(diagnos) 
		drop diagnos_temp
		gen miscarriage = 0
			replace miscarriage = 1 if strpos(diagnos, "O00")>0
			replace miscarriage = 1 if strpos(diagnos, "O01")>0
			replace miscarriage = 1 if strpos(diagnos, "O02")>0
			replace miscarriage = 1 if strpos(diagnos, "O03")>0
		di "got here"
		gen hdia_miscarriage = 0 
			replace hdia_miscarriage = 1 if strpos(hdia, "O00")>0
			replace hdia_miscarriage = 1 if strpos(hdia, "O01")>0
			replace hdia_miscarriage = 1 if strpos(hdia, "O02")>0
			replace hdia_miscarriage = 1 if strpos(hdia, "O03")>0
		keep if miscarriage == 1 | hdia_miscarriage == 1
		
		di "drop if missing date - I don't see any better way to deal with this"
		drop if indatuma==""
		
		di "only need indicator of past miscarriage --> drop duplicates in patient registers (multiple miscarriages on same day)"
		by lopnr indatuma, sort: gen cont = _n 
		egen tot = max(cont), by(lopnr indatuma)
		tab tot 
		drop if cont>1 
		
		di "save"
		keep lopnr indatuma miscarriage hdia_miscarriage source 
		sort lopnr indatuma 
		isid lopnr indatuma 
		qui compress 
		save "$TEMP\derived\build_model_sample\miscarriages_`i'.dta", replace

	}

	di "append miscarriages from outpatient and inpatient registers + fix date"
	use $TEMP\derived\build_model_sample\miscarriages_1.dta, clear 
	append using $TEMP\derived\build_model_sample\miscarriages_2.dta
	gen date_mis = daily(indatuma, "YMD")
		format date_mis %td 
		la var date_mis "date of miscarriage (year, month, day)"
		count if mi(date_mis)
		tab indatuma if mi(date_mis)
		di "note: dropping if missing exact date - not sure of a better way to deal with this"
		drop if mi(date_mis)
		drop indatuma 
	sort lopnr date_mis	
	save $TEMP\derived\build_model_sample\miscarriages.dta, replace 
	rm $TEMP\derived\build_model_sample\miscarriages_1.dta
	rm $TEMP\derived\build_model_sample\miscarriages_2.dta
	
	di "collapse to date of woman's first miscarriage"
	bysort lopnr: egen first_mis = min(date_mis) if miscarriage == 1
	bysort lopnr: egen first_mis_hdia = min(date_mis) if hdia_miscarriage == 1
	collapse (firstnm) first_mis first_mis_hdia, by(lopnr)
	
	di "merge analysis+mfr sample to miscarriage sample"
	merge 1:m lopnr using $TEMP\derived\build_model_sample\mfr_analysis_plus_issues.dta, ///
	  assert(1 2 3) keep(2 3) nogen 
	gen date_expcon = date_preg - 280 
		format date_expcon %td
	gen dv_prev_mis = 1 * (first_mis < date_expcon & !mi(first_mis))
	gen dv_prev_mis_hdia = 1 * (first_mis_hdia < date_expcon & !mi(first_mis_hdia))
	rm $TEMP\derived\build_model_sample\mfr_analysis_plus_issues.dta

	* create vars for next merge 
	sort lopnr year pregid_full 
	save $TEMP\derived\build_model_sample\mfr_analysis_plus_issues_mis.dta, replace 
end 


program grab_civil 
*note: this program uses some of the same notation as CivilSnippet.do 

	*grab civil variable from raw LISA files 
	foreach yr of numlist 2008/2015 2017 2018 {		
		di "`yr'"
		use "MYPATH\MYPATH_`yr'.dta", clear
		keep lopnr senpnr civil 
		gen year = `yr'
		
		if `yr'==2008 {
			save $TEMP\derived\build_model_sample\civil.dta, replace 
		}
		else {
		    append using  $TEMP\derived\build_model_sample\civil.dta
			save  $TEMP\derived\build_model_sample\civil.dta, replace 
		}
	}
	
	foreach yr of numlist 2016 {
		di "2016"
	    use "MYPATH\MYPATH_`yr'.dta", clear
		keep lopnr senpnr civil 
		gen year = `yr'
		append using  $TEMP\derived\build_model_sample\civil.dta
		save  $TEMP\derived\build_model_sample\civil.dta, replace 
	}
	
	di "LISA_2019 real data"
	use "MYPATH\MYPATH.dta", clear
	keep lopnr civil 
	gen year = 2019
	append using  $TEMP\derived\build_model_sample\civil.dta
	save $TEMP\derived\build_model_sample\civil.dta, replace

	
	sort lopnr year 
	duplicates drop 
	duplicates tag year lopnr, gen(ddup)
	tab ddup
	tab ddup year
	drop if ddup != 0 & senpnr == 0 
	drop if ddup != 0 & year == 2019
	drop ddup
	isid lopnr year 
	save $TEMP\derived\build_model_sample\civil.dta, replace 
	
	* merge onto cleaned LISA data
	use "MYPATH\MYPATH.dta", clear

	keep if year>=2008 
	sort lopnr year 
	merge 1:1 lopnr year using $TEMP\derived\build_model_sample\civil.dta, assert(1 2 3) keep(1 3) nogen
	 
	sort lopnr year 
	
	*create variables from civil
		* dummies for current relationship status 
	gen married  = (civil == "G")
		replace married = . if civil==""
	gen divorced = (civil == "SP" | civil == "S" )
		replace divorced = . if civil==""
	la var married "Married in current year"
	la var divorced "Divorced in current year"
	
		* dummies for relationship status in previous year 
	by lopnr: gen married_prev = married[_n-1] if year==year[_n-1]+1 
	by lopnr: gen divorced_prev = divorced[_n-1] if year==year[_n-1]+1 
	la var married_prev "Married in previous year"
	la var divorced_prev "Divorced in previous year"

	*save
	qui compress 	
end 


program merge_lisa 
*note: this program uses some of the same notation as "prep_data" program in Sean\Code\Prenatal\select_observe_kub\analysis.do 
	di "prep LISA w/ civil data for merge"
	xtset lopnr year 
	tsfill
	sort lopnr year 
	keep lopnr year marri* divorc* 
	di "merge" 
	merge 1:m lopnr year using $TEMP\derived\build_model_sample\mfr_analysis_plus_issues_mis.dta, ///
	  assert(1 2 3) keep(2 3) nogen
	rm $TEMP\derived\build_model_sample\mfr_analysis_plus_issues_mis.dta
	sort pregnancy 
	save $TEMP\derived\build_model_sample\mfr_analysis_plus_issues_mis_LISA.dta, replace
end


program merge_kids
	use "MYPATH\MYPATH.dta", clear
	keep if birth_flag == 1 | birth_flag == 4
	bysort lopnr bfoddat: gen within_preg = _n
	by lopnr: gen num_child = _n
	keep if within_preg == 1
	
	gen bfoddat_month = mod(bfoddat, 100)
	gen bfoddat_yr = floor(bfoddat/100)
	gen m_year = ym(bfoddat_yr, bfoddat_month)
	format m_year %tm
	
	xtset lopnr m_year 
	preserve 
	drop if m_year < ym(2011, 1)
	save $TEMP\derived\build_model_sample\allsampleyears, replace
	restore
	
	keep if m_year < ym(2010, 12)
	collapse (max) num_child, by(lopnr)
	gen m_year = ym(2010, 11)
	append using $TEMP\derived\build_model_sample\allsampleyears
	xtset lopnr m_year 
	tsfill, full
	replace num_child = 0 if m_year == ym(2010,11) & mi(num_child)
	
	local obs = _N
	by lopnr: gen lopnr_num = _n
	
	qui count if mi(num_child) 
	local num_mi = r(N)
	while `num_mi' > 0 {
		replace num_child = L.num_child if mi(num_child)
		count if mi(num_child) 
		local num_mi = r(N)
	}
	assert !mi(num_child)
	gen num_prev_kids = L2.num_child 
	keep lopnr m_year num_prev_kids 
	keep if m_year >= ym(2011, 1)
	
	* create rows through 2020 with num kids in dec 2019 bc don't have 2020 MFR * 
	preserve 
	collapse (max) num_prev_kids, by(lopnr)
	gen m_year = ym(2020,12)
	save $TEMP\derived\build_model_sample\temp_2019, replace
	restore
	append using $TEMP\derived\build_model_sample\temp_2019
	tsfill, full 
	bysort lopnr: egen max_kids = max(num_prev_kids)
	replace num_prev_kids = max_kids if m_year > ym(2019, 12)
	drop max_kids
	
	merge 1:m lopnr m_year using $TEMP\derived\build_model_sample\mfr_analysis_plus_issues_mis_LISA.dta, ///
	  assert(1 2 3) keep(2 3) nogen 
	rm $TEMP\derived\build_model_sample\mfr_analysis_plus_issues_mis_LISA.dta

	replace num_prev_kids = 0 if mi(num_prev_kids)
	gen dv_prev_kids = 1 * (num_prev_kids > 0)
	
	qui compress 
end 

program merge_income_tiles
	*drop to prep merging income tiles
	drop hh_AGI_inc hh_inc_smooth inc_rank inc_quartile
	merge m:1 lopnr year using "$DATA\income_tiles.dta", assert(1 2 3) keep(1 3) 
end



* Execute 
main

